See CausalInferenceA
harvest.2013.dat <- read.csv(file='./data/A 2013 Soybeans Harvest.csv')
harvest.2015.dat <- read.csv(file='./data/A 2015 Wheat Harvest.csv')
harvest.2018.dat <- read.csv(file='./data/A 2018 Corn Harvest.csv')
harvest.2016.dat <- read.csv(file='./data/A 2016 Corn Harvest.csv')
harvest.2017.dat <- read.csv(file='./data/A 2017 Soybeans Harvest.csv')
seed.2018.dat <-read.csv(file='./data/A 2018 Corn Seeding.csv')
harvest.2019.dat <- read.csv(file='./data/A 2019 Soybeans Harvest.csv')
seed.2020.dat <-read.csv(file='./data/A 2020 Corn Seeding.csv')
harvest.2020.dat <-read.csv(file='./data/A 2020 Corn Harvest.csv')
Screen for outliers.
seed.2018.dat <- remove.seed.outliers.fn(seed.2018.dat)
seed.2020.dat <- remove.seed.outliers.fn(seed.2020.dat)
harvest.2020.dat <- remove.harvest.outliers.fn(harvest.2020.dat)
harvest.2019.dat <- remove.harvest.outliers.fn(harvest.2019.dat)
harvest.2018.dat <- remove.harvest.outliers.fn(harvest.2018.dat)
harvest.2017.dat <- remove.harvest.outliers.fn(harvest.2017.dat)
harvest.2013.dat <- remove.harvest.outliers.fn(harvest.2013.dat)
harvest.2015.dat <- remove.harvest.outliers.fn(harvest.2015.dat)
harvest.2016.dat <- remove.harvest.outliers.fn(harvest.2016.dat)
grid.size=50
all.harvest.dat <- rbind(harvest.2013.dat,
harvest.2015.dat,
harvest.2016.dat,
harvest.2017.dat,
harvest.2018.dat,
harvest.2019.dat,
harvest.2020.dat)
all.harvest.dat$LatCell <- as.factor(floor(all.harvest.dat$Latitude/grid.size))
all.harvest.dat$LonCell <- as.factor(floor(all.harvest.dat$Longitude/grid.size))
all.harvest.dat$Cell <- all.harvest.dat$LonCell:all.harvest.dat$LatCell
common.dat <- aggregate(Longitude ~ Cell, data = all.harvest.dat,mean)
row.names(common.dat) <- common.dat$Cell
Lats <- aggregate(Latitude ~ Cell, data = all.harvest.dat,mean)
row.names(Lats) <- Lats$Cell
Counts <- aggregate(Yield ~ Cell, data = all.harvest.dat,length)
row.names(Counts) <- Counts$Cell
head(Counts)
## Cell Yield
## 0:7 0:7 399
## 0:8 0:8 683
## 0:9 0:9 668
## 0:10 0:10 719
## 0:11 0:11 716
## 0:12 0:12 230
common.dat$Latitude <- Lats[row.names(common.dat),'Latitude']
common.dat$Counts <- Counts[row.names(common.dat),'Yield']
head(common.dat)
## Cell Longitude Latitude Counts
## 0:7 0:7 28.43886 382.1674 399
## 0:8 0:8 27.94464 425.2921 683
## 0:9 0:9 26.94234 474.8385 668
## 0:10 0:10 27.99323 525.3938 719
## 0:11 0:11 27.05665 574.8760 716
## 0:12 0:12 28.71218 612.3923 230
common.dat <- common.dat[common.dat$Count>4,]
See CausalInferenceA for models, and YieldDataModels/ModelSelection for model choice
Here, we use bs with 12 df
#basis splines
df <- 12
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2018.dat)
common.dat$Y18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
hist(common.dat$Y18)
l1 <- lm(Yield ~bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2017.dat)
common.dat$Y17 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2016.dat)
common.dat$Y16 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2015.dat)
common.dat$Y15 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 190.210966603871, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 190.210966603871, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2013.dat)
common.dat$Y13 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.210411192895, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.210411192895, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2019.dat)
common.dat$Y19 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.07298058378, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.07298058378, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2020.dat)
common.dat$Y20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(AppliedRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2018.dat)
common.dat$AR18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(ControlRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2018.dat)
common.dat$R18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(ControlRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2020.dat)
common.dat$R20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(AppliedRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2020.dat)
common.dat$AR20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, :
## prediction from a rank-deficient fit may be misleading
common.dat$Y13r <- rank(common.dat$Y13)
common.dat$Y13r <- common.dat$Y13r/max(common.dat$Y13r)
common.dat$Y15r <- rank(common.dat$Y15)
common.dat$Y15r <- common.dat$Y15r/max(common.dat$Y15r)
Maps <- data.frame(Longitude=c(common.dat$Longitude,common.dat$Longitude),
Latitude=c(common.dat$Latitude,common.dat$Latitude),
Value=c(common.dat$Y13r,common.dat$Y15r),
Map=c(rep('Y13r',length(common.dat$Y13r)),
rep('Y15r',length(common.dat$Y15r))))
ggplot(Maps, aes(Longitude,Latitude)) +
geom_point(aes(colour = Value),size=0.5) +
scale_colour_gradient2(low=vermillion, mid=yellow, high=blue, midpoint = 0.5) +
labs(colour = "Relative Rank", x="Easting", y="Northing", title = "Seeding and Yield (2018)") + facet_wrap(~ Map)
model4.dag <- model2network("[Y17][R18|Y17][AR18|R18][Y18|AR18:Y17]")
graphviz.plot(model4.dag)
fit1 = bn.fit(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])
fit1
strength4 <- arc.strength(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])
strength.plot(model4.dag, strength4)
bf.strength4 <- bf.strength(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])
strength.plot(model4.dag, bf.strength4)
averaged.network(bf.strength4)
plot(bf.strength4)
strength4
bf.strength4
learned.dag = iamb(common.dat[,c('Y18','R18','Y17','AR18')])
graphviz.plot(learned.dag)
#learned.dag = set.arc(learned.dag, from = "Y17", to = "Y18")
##learned.dag = set.arc(learned.dag, from = "Y17", to = "R18")
#learned.dag = set.arc(learned.dag, from = "R18", to = "Y18")
#graphviz.plot(learned.dag)
#fit1 = bn.fit(learned.dag, common.dat[,c('Y18','R18','Y17')])
#fit1
model6_dag <- dagify(Y18 ~ AR18,
Y18 ~ Y17,
AR18 ~ R18,
R18 ~ RS,
Y18 ~ RS,
Y17 ~ Y16,
Y16 ~ Y15,
Y15 ~ RS,
Y16 ~ RS,
Y17 ~ RS,
labels = c("Y18" = "Yield\n 2018",
"R18" = "Control Rate",
"Y17" = "Yield \n2017"),
latent = "RS",
#exposure = "smoking",
outcome = "Y18")
#ggdag(model6_dag, text = TRUE, use_labels = "label")
ggdag(model6_dag, text = TRUE)
require(mgcv)
k=160
s=2^(-5)
useGAM = FALSE
#nearly every path is significant
KRIG = TRUE
LM = FALSE
d=21
gam.dat <- seed.2018.dat
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2018.dat)
gam.dat$Y18 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2017.dat)
gam.dat$Y17 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2016.dat)
gam.dat$Y16 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2015.dat)
gam.dat$Y15 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2013.dat)
gam.dat$Y13 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2019.dat)
gam.dat$Y19 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(ControlRate ~ s(Longitude,Latitude,k=k),data=seed.2020.dat)
gam.dat$R20 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2020.dat)
gam.dat$Y20 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
# natural splines
i=9
ns.dat <- seed.2018.dat
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2018.dat)
ns.dat$Y18 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(Yield ~ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2017.dat)
ns.dat$Y17 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2016.dat)
ns.dat$Y16 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2015.dat)
ns.dat$Y15 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2013.dat)
ns.dat$Y13 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2019.dat)
ns.dat$Y19 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
l1 <- lm(ControlRate ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=seed.2020.dat)
ns.dat$R20 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
map.dat <- common.dat
map.dat$Y19 <- rank(map.dat$Y19)
map.dat$Y19 <- map.dat$Y19/max(map.dat$Y19)
map.dat$Y18 <- rank(map.dat$Y18)
map.dat$Y18 <- map.dat$Y18/max(map.dat$Y18)
map.dat$Y17 <- rank(map.dat$Y17)
map.dat$Y17 <- map.dat$Y17/max(map.dat$Y17)
map.dat$Y16 <- rank(map.dat$Y16)
map.dat$Y16 <- map.dat$Y16/max(map.dat$Y16)
map.dat$Y15 <- rank(map.dat$Y15)
map.dat$Y15 <- map.dat$Y15/max(map.dat$Y15)
map.dat$Y13 <- rank(map.dat$Y13)
map.dat$Y13 <- map.dat$Y13/max(map.dat$Y13)
Maps6 <- data.frame(Longitude=c(map.dat$Longitude,
map.dat$Longitude,
map.dat$Longitude,
map.dat$Longitude,
map.dat$Longitude,
map.dat$Longitude),
Latitude=c(map.dat$Latitude,
map.dat$Latitude,
map.dat$Latitude,
map.dat$Latitude,
map.dat$Latitude,
map.dat$Latitude),
Value=c(map.dat$Y19,
map.dat$Y18,
map.dat$Y17,
map.dat$Y16,
map.dat$Y15,
map.dat$Y13),
Map=c(rep(2019,length(map.dat$Y19)),
rep(2018,length(map.dat$Y18)),
rep(2017,length(map.dat$Y17)),
rep(2016,length(map.dat$Y16)),
rep(2015,length(map.dat$Y15)),
rep(2013,length(map.dat$Y13))))
ggplot(Maps6, aes(Longitude,Latitude)) +
geom_point(aes(colour = Value),size=.5) +
scale_colour_gradient2(low=vermillion, mid=yellow, high=blue, midpoint = 0.5) +
labs(colour = "Relative Rank", x="Easting", y="Northing", title = "Multiple Years") + facet_wrap(~ Map)
mapped.dat <- common.dat
model5.dat <- common.dat[,c('Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model5.dag <- model2network("[Y15][Y16|Y15][Y17|Y16][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17]")
#graphviz.plot(model5.dag,layout = "dot")
#graphviz.plot(model5.dag,layout = "fdp")
graphviz.plot(model5.dag,layout = "circo")
## Loading required namespace: Rgraphviz
model5.fit = bn.fit(model5.dag, model5.dat)
model5.fit
##
## Bayesian network parameters
##
## Parameters of node AR18 (Gaussian distribution)
##
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept) R18
## 7819.095715 0.708718
## Standard deviation of the residuals: 595.3269
##
## Parameters of node R18 (Gaussian distribution)
##
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
## (Intercept) Y15 Y16 Y17
## 13170.310219 11.688495 -3.943456 238.455726
## Standard deviation of the residuals: 810.0274
##
## Parameters of node Y15 (Gaussian distribution)
##
## Conditional density: Y15
## Coefficients:
## (Intercept)
## 34.29315
## Standard deviation of the residuals: 10.88382
##
## Parameters of node Y16 (Gaussian distribution)
##
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept) Y15
## 120.8531964 0.1483424
## Standard deviation of the residuals: 12.83198
##
## Parameters of node Y17 (Gaussian distribution)
##
## Conditional density: Y17 | Y16
## Coefficients:
## (Intercept) Y16
## 50.58682189 0.05594438
## Standard deviation of the residuals: 2.50968
##
## Parameters of node Y18 (Gaussian distribution)
##
## Conditional density: Y18 | AR18 + Y17
## Coefficients:
## (Intercept) AR18 Y17
## -109.49460929 0.01168506 0.48117522
## Standard deviation of the residuals: 27.09955
#bn.fit.qqplot(model5.fit)
strength5 <- arc.strength(model5.dag, model5.dat)
strength.plot(model5.dag, strength5,layout = "circo")
bf.strength5 <- bf.strength(model5.dag, model5.dat)
## Loading required namespace: Rmpfr
strength.plot(model5.dag, bf.strength5)
averaged.network(bf.strength5)
##
## Random/Generated Bayesian network
##
## model:
## [R18][Y15][Y16][Y17|R18:Y15:Y16][Y18|R18][AR18|R18:Y17]
## nodes: 6
## arcs: 6
## undirected arcs: 0
## directed arcs: 6
## average markov blanket size: 3.00
## average neighbourhood size: 2.00
## average branching factor: 1.00
##
## generation algorithm: Model Averaging
## significance threshold: 0.4057388
plot(bf.strength5)
strength5
## from to strength
## 1 Y15 Y16 5.496561e-02
## 2 Y16 Y17 1.574890e-05
## 3 Y17 R18 1.741050e-22
## 4 Y16 R18 3.561959e-01
## 5 Y15 R18 2.259245e-02
## 6 R18 AR18 3.115335e-49
## 7 AR18 Y18 1.856984e-05
## 8 Y17 Y18 6.205303e-01
bf.strength5
## from to strength direction
## 1 AR18 R18 1.000000e+00 3.643412e-24
## 2 AR18 Y15 2.959340e-06 0.000000e+00
## 3 AR18 Y16 3.009519e-06 0.000000e+00
## 4 AR18 Y17 1.000000e+00 0.000000e+00
## 5 AR18 Y18 4.057388e-01 1.853371e-02
## 6 R18 AR18 1.000000e+00 1.000000e+00
## 7 R18 Y15 2.786016e-05 0.000000e+00
## 8 R18 Y16 2.419826e-06 0.000000e+00
## 9 R18 Y17 1.000000e+00 9.992223e-01
## 10 R18 Y18 1.000000e+00 1.000000e+00
## 11 Y15 AR18 2.959340e-06 1.000000e+00
## 12 Y15 R18 2.786016e-05 1.000000e+00
## 13 Y15 Y16 1.942213e-03 5.000000e-01
## 14 Y15 Y17 9.028457e-01 1.000000e+00
## 15 Y15 Y18 5.635342e-05 1.000000e+00
## 16 Y16 AR18 3.009519e-06 1.000000e+00
## 17 Y16 R18 2.419826e-06 1.000000e+00
## 18 Y16 Y15 1.942213e-03 5.000000e-01
## 19 Y16 Y17 9.126284e-01 8.466694e-01
## 20 Y16 Y18 6.804326e-05 1.000000e+00
## 21 Y17 AR18 1.000000e+00 1.000000e+00
## 22 Y17 R18 1.000000e+00 7.777172e-04
## 23 Y17 Y15 9.028457e-01 0.000000e+00
## 24 Y17 Y16 9.126284e-01 1.533306e-01
## 25 Y17 Y18 4.041052e-04 1.000000e+00
## 26 Y18 AR18 4.057388e-01 9.814663e-01
## 27 Y18 R18 1.000000e+00 0.000000e+00
## 28 Y18 Y15 5.635342e-05 0.000000e+00
## 29 Y18 Y16 6.804326e-05 0.000000e+00
## 30 Y18 Y17 4.041052e-04 0.000000e+00
model6.dat <- common.dat[,c('Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model6.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15]")
model6.dag
##
## Random/Generated Bayesian network
##
## model:
## [Y15][Y16|Y15][Y17|Y15:Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y15:Y16:Y17]
## nodes: 6
## arcs: 11
## undirected arcs: 0
## directed arcs: 11
## average markov blanket size: 4.67
## average neighbourhood size: 3.67
## average branching factor: 1.83
##
## generation algorithm: Empty
model6.fit = bn.fit(model6.dag, model6.dat)
model6.fit
##
## Bayesian network parameters
##
## Parameters of node AR18 (Gaussian distribution)
##
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept) R18
## 7819.095715 0.708718
## Standard deviation of the residuals: 595.3269
##
## Parameters of node R18 (Gaussian distribution)
##
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
## (Intercept) Y15 Y16 Y17
## 13170.310219 11.688495 -3.943456 238.455726
## Standard deviation of the residuals: 810.0274
##
## Parameters of node Y15 (Gaussian distribution)
##
## Conditional density: Y15
## Coefficients:
## (Intercept)
## 34.29315
## Standard deviation of the residuals: 10.88382
##
## Parameters of node Y16 (Gaussian distribution)
##
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept) Y15
## 120.8531964 0.1483424
## Standard deviation of the residuals: 12.83198
##
## Parameters of node Y17 (Gaussian distribution)
##
## Conditional density: Y17 | Y15 + Y16
## Coefficients:
## (Intercept) Y15 Y16
## 49.24033167 0.06409793 0.04918221
## Standard deviation of the residuals: 2.417094
##
## Parameters of node Y18 (Gaussian distribution)
##
## Conditional density: Y18 | AR18 + Y15 + Y16 + Y17
## Coefficients:
## (Intercept) AR18 Y15 Y16 Y17
## -116.85558972 0.01190639 0.02536271 0.12130809 0.22570330
## Standard deviation of the residuals: 27.1727
#bn.fit.qqplot(model6.fit)
strength6 <- arc.strength(model6.dag, model6.dat)
strength.plot(model6.dag, strength6)
bf.strength6 <- bf.strength(model6.dag, model6.dat)
strength.plot(model6.dag, bf.strength6)
averaged.network(bf.strength6)
##
## Random/Generated Bayesian network
##
## model:
## [partially directed graph]
## nodes: 6
## arcs: 6
## undirected arcs: 1
## directed arcs: 5
## average markov blanket size: 2.00
## average neighbourhood size: 2.00
## average branching factor: 0.83
##
## generation algorithm: Model Averaging
## significance threshold: 0.406479
plot(bf.strength6)
strength6
## from to strength
## 1 Y15 Y16 5.496561e-02
## 2 Y16 Y17 8.711057e-05
## 3 Y15 Y17 1.721409e-05
## 4 Y17 R18 1.741050e-22
## 5 Y16 R18 3.561959e-01
## 6 Y15 R18 2.259245e-02
## 7 R18 AR18 3.115335e-49
## 8 AR18 Y18 1.513771e-05
## 9 Y17 Y18 8.263446e-01
## 10 Y16 Y18 3.997607e-01
## 11 Y15 Y18 8.820933e-01
bf.strength6
## from to strength direction
## 1 AR18 R18 1.000000e+00 3.643412e-24
## 2 AR18 Y15 2.959340e-06 0.000000e+00
## 3 AR18 Y16 3.009519e-06 0.000000e+00
## 4 AR18 Y17 1.000000e+00 0.000000e+00
## 5 AR18 Y18 4.064790e-01 2.154133e-02
## 6 R18 AR18 1.000000e+00 1.000000e+00
## 7 R18 Y15 2.786016e-05 0.000000e+00
## 8 R18 Y16 2.419826e-06 0.000000e+00
## 9 R18 Y17 1.000000e+00 5.000000e-01
## 10 R18 Y18 1.000000e+00 1.000000e+00
## 11 Y15 AR18 2.959340e-06 1.000000e+00
## 12 Y15 R18 2.786016e-05 1.000000e+00
## 13 Y15 Y16 1.942213e-03 5.000000e-01
## 14 Y15 Y17 9.028457e-01 1.000000e+00
## 15 Y15 Y18 5.091052e-05 1.000000e+00
## 16 Y16 AR18 3.009519e-06 1.000000e+00
## 17 Y16 R18 2.419826e-06 1.000000e+00
## 18 Y16 Y15 1.942213e-03 5.000000e-01
## 19 Y16 Y17 7.620855e-01 5.000000e-01
## 20 Y16 Y18 6.147137e-05 1.000000e+00
## 21 Y17 AR18 1.000000e+00 1.000000e+00
## 22 Y17 R18 1.000000e+00 5.000000e-01
## 23 Y17 Y15 9.028457e-01 0.000000e+00
## 24 Y17 Y16 7.620855e-01 5.000000e-01
## 25 Y17 Y18 3.103096e-04 1.000000e+00
## 26 Y18 AR18 4.064790e-01 9.784587e-01
## 27 Y18 R18 1.000000e+00 0.000000e+00
## 28 Y18 Y15 5.091052e-05 0.000000e+00
## 29 Y18 Y16 6.147137e-05 0.000000e+00
## 30 Y18 Y17 3.103096e-04 0.000000e+00
bn.cv(model5.dat, model5.dag)
##
## k-fold cross-validation for Bayesian networks
##
## target network structure:
## [Y15][Y16|Y15][Y17|Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y17]
## number of folds: 10
## loss function: Log-Likelihood Loss (Gauss.)
## expected loss: 32.30852
bn.cv(model6.dat, model6.dag)
##
## k-fold cross-validation for Bayesian networks
##
## target network structure:
## [Y15][Y16|Y15][Y17|Y15:Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y15:Y16:Y17]
## number of folds: 10
## loss function: Log-Likelihood Loss (Gauss.)
## expected loss: 32.3357
#VarCorr(default.gam)
#learned.dag = iamb(common.dat[,c('Y18',"R18","Y17","Y16","Y15")])
learned.dag = iamb(common.dat[,c('Y18','AR18',"R18","Y17","Y16","Y15","Y13")])
graphviz.plot(learned.dag)
model9.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15][Y19|Y18][R20|Y19:Y18:Y17:Y16:Y15]")
model9.fit = bn.fit(model9.dag, common.dat[,c('R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')])
model9.fit
bn.fit.qqplot(model9.fit)
strength9 <- arc.strength(model9.dag, mapped.dat[,c('R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')])
strength9
strength.plot(model9.dag, strength9)
model9.dat <- common.dat[,c('Y20','R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model9.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15][Y19|Y18:Y17:Y16:Y15][R20|Y19:Y18:Y17:Y16:Y15][Y20|R20:Y19:Y18:Y17:Y16:Y15]")
model9.fit = bn.fit(model9.dag, model9.dat)
model9.fit
##
## Bayesian network parameters
##
## Parameters of node AR18 (Gaussian distribution)
##
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept) R18
## 7819.095715 0.708718
## Standard deviation of the residuals: 595.3269
##
## Parameters of node R18 (Gaussian distribution)
##
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
## (Intercept) Y15 Y16 Y17
## 13170.310219 11.688495 -3.943456 238.455726
## Standard deviation of the residuals: 810.0274
##
## Parameters of node R20 (Gaussian distribution)
##
## Conditional density: R20 | Y15 + Y16 + Y17 + Y18 + Y19
## Coefficients:
## (Intercept) Y15 Y16 Y17 Y18
## 15836.225634 8.362858 -2.394976 -140.813604 50.018882
## Y19
## 179.003682
## Standard deviation of the residuals: 1023.939
##
## Parameters of node Y15 (Gaussian distribution)
##
## Conditional density: Y15
## Coefficients:
## (Intercept)
## 34.29315
## Standard deviation of the residuals: 10.88382
##
## Parameters of node Y16 (Gaussian distribution)
##
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept) Y15
## 120.8531964 0.1483424
## Standard deviation of the residuals: 12.83198
##
## Parameters of node Y17 (Gaussian distribution)
##
## Conditional density: Y17 | Y15 + Y16
## Coefficients:
## (Intercept) Y15 Y16
## 49.24033167 0.06409793 0.04918221
## Standard deviation of the residuals: 2.417094
##
## Parameters of node Y18 (Gaussian distribution)
##
## Conditional density: Y18 | AR18 + Y15 + Y16 + Y17
## Coefficients:
## (Intercept) AR18 Y15 Y16 Y17
## -116.85558972 0.01190639 0.02536271 0.12130809 0.22570330
## Standard deviation of the residuals: 27.1727
##
## Parameters of node Y19 (Gaussian distribution)
##
## Conditional density: Y19 | Y15 + Y16 + Y17 + Y18
## Coefficients:
## (Intercept) Y15 Y16 Y17 Y18
## 27.56697363 -0.08280492 -0.05653656 0.79320593 -0.07986473
## Standard deviation of the residuals: 5.401567
##
## Parameters of node Y20 (Gaussian distribution)
##
## Conditional density: Y20 | R20 + Y15 + Y16 + Y17 + Y18 + Y19
## Coefficients:
## (Intercept) R20 Y15 Y16 Y17
## -159.7271569 0.0116918 0.2581445 0.2919652 -0.2172005
## Y18 Y19
## 0.2242037 -1.3614663
## Standard deviation of the residuals: 9.04632
bn.fit.qqplot(model9.fit)
bf.strength9 <- bf.strength(model9.dag, model9.dat)
strength.plot(model9.dag, bf.strength9)
averaged.network(bf.strength9)
## Warning in averaged.network.backend(strength = strength, nodes = nodes, : arc
## R20 -> Y19 would introduce cycles in the graph, ignoring.
##
## Random/Generated Bayesian network
##
## model:
## [partially directed graph]
## nodes: 9
## arcs: 18
## undirected arcs: 1
## directed arcs: 17
## average markov blanket size: 5.56
## average neighbourhood size: 4.00
## average branching factor: 1.89
##
## generation algorithm: Model Averaging
## significance threshold: 0.5340344
plot(bf.strength9)
bf.strength9
## from to strength direction
## 1 AR18 R18 1.000000e+00 3.643412e-24
## 2 AR18 R20 3.437085e-07 1.000000e+00
## 3 AR18 Y15 2.959340e-06 0.000000e+00
## 4 AR18 Y16 3.009519e-06 0.000000e+00
## 5 AR18 Y17 1.000000e+00 0.000000e+00
## 6 AR18 Y18 4.064790e-01 2.154133e-02
## 7 AR18 Y19 9.997348e-01 1.000000e+00
## 8 AR18 Y20 5.340344e-01 1.000000e+00
## 9 R18 AR18 1.000000e+00 1.000000e+00
## 10 R18 R20 1.000000e+00 1.000000e+00
## 11 R18 Y15 2.786016e-05 0.000000e+00
## 12 R18 Y16 2.419826e-06 0.000000e+00
## 13 R18 Y17 1.000000e+00 5.000000e-01
## 14 R18 Y18 1.000000e+00 1.000000e+00
## 15 R18 Y19 9.999876e-01 1.000000e+00
## 16 R18 Y20 6.726247e-06 1.000000e+00
## 17 R20 AR18 3.437085e-07 0.000000e+00
## 18 R20 R18 1.000000e+00 0.000000e+00
## 19 R20 Y15 2.969466e-06 0.000000e+00
## 20 R20 Y16 1.155983e-06 0.000000e+00
## 21 R20 Y17 2.106598e-01 0.000000e+00
## 22 R20 Y18 1.000000e+00 0.000000e+00
## 23 R20 Y19 1.000000e+00 5.000000e-01
## 24 R20 Y20 1.000000e+00 5.000000e-01
## 25 Y15 AR18 2.959340e-06 1.000000e+00
## 26 Y15 R18 2.786016e-05 1.000000e+00
## 27 Y15 R20 2.969466e-06 1.000000e+00
## 28 Y15 Y16 1.942213e-03 5.000000e-01
## 29 Y15 Y17 9.028457e-01 1.000000e+00
## 30 Y15 Y18 5.091052e-05 1.000000e+00
## 31 Y15 Y19 5.444680e-03 1.000000e+00
## 32 Y15 Y20 7.793566e-01 1.000000e+00
## 33 Y16 AR18 3.009519e-06 1.000000e+00
## 34 Y16 R18 2.419826e-06 1.000000e+00
## 35 Y16 R20 1.155983e-06 1.000000e+00
## 36 Y16 Y15 1.942213e-03 5.000000e-01
## 37 Y16 Y17 7.620855e-01 5.000000e-01
## 38 Y16 Y18 6.147137e-05 1.000000e+00
## 39 Y16 Y19 1.644025e-03 1.000000e+00
## 40 Y16 Y20 9.998888e-01 1.000000e+00
## 41 Y17 AR18 1.000000e+00 1.000000e+00
## 42 Y17 R18 1.000000e+00 5.000000e-01
## 43 Y17 R20 2.106598e-01 1.000000e+00
## 44 Y17 Y15 9.028457e-01 0.000000e+00
## 45 Y17 Y16 7.620855e-01 5.000000e-01
## 46 Y17 Y18 3.103096e-04 1.000000e+00
## 47 Y17 Y19 9.986654e-01 1.000000e+00
## 48 Y17 Y20 8.877928e-04 1.000000e+00
## 49 Y18 AR18 4.064790e-01 9.784587e-01
## 50 Y18 R18 1.000000e+00 0.000000e+00
## 51 Y18 R20 1.000000e+00 1.000000e+00
## 52 Y18 Y15 5.091052e-05 0.000000e+00
## 53 Y18 Y16 6.147137e-05 0.000000e+00
## 54 Y18 Y17 3.103096e-04 0.000000e+00
## 55 Y18 Y19 1.000000e+00 1.509176e-06
## 56 Y18 Y20 9.998458e-01 1.000000e+00
## 57 Y19 AR18 9.997348e-01 0.000000e+00
## 58 Y19 R18 9.999876e-01 0.000000e+00
## 59 Y19 R20 1.000000e+00 5.000000e-01
## 60 Y19 Y15 5.444680e-03 0.000000e+00
## 61 Y19 Y16 1.644025e-03 0.000000e+00
## 62 Y19 Y17 9.986654e-01 0.000000e+00
## 63 Y19 Y18 1.000000e+00 9.999985e-01
## 64 Y19 Y20 1.000000e+00 1.000000e+00
## 65 Y20 AR18 5.340344e-01 0.000000e+00
## 66 Y20 R18 6.726247e-06 0.000000e+00
## 67 Y20 R20 1.000000e+00 5.000000e-01
## 68 Y20 Y15 7.793566e-01 0.000000e+00
## 69 Y20 Y16 9.998888e-01 0.000000e+00
## 70 Y20 Y17 8.877928e-04 0.000000e+00
## 71 Y20 Y18 9.998458e-01 0.000000e+00
## 72 Y20 Y19 1.000000e+00 0.000000e+00
#VarCorr(default.gam)
#learned.dag = iamb(common.dat[,c('Y18',"R18","Y17","Y16","Y15")])
learned.dag = iamb(mapped.dat[,c('R20','Y19','Y18','AR18',"R18","Y17","Y16","Y15","Y13")])
## Warning in vstruct.apply(arcs = arcs, vs = vs, nodes = nodes, debug = debug):
## vstructure Y19 -> AR18 <- R18 is not applicable, because one or both arcs are
## oriented in the opposite direction.
## Warning in vstruct.apply(arcs = arcs, vs = vs, nodes = nodes, debug = debug):
## vstructure Y19 -> R20 <- R18 is not applicable, because one or both arcs are
## oriented in the opposite direction.
graphviz.plot(learned.dag)
home.2017.krig <- krig.yield.fn(harvest.2017.dat[,c("Yield","Longitude","Latitude")],
seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
home.krig <- krig.yield.fn(harvest.2018.dat[,c("Yield","Longitude","Latitude")],
seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
home.2016.krig <- krig.yield.fn(harvest.2016.dat[,c("Yield","Longitude","Latitude")],
seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
seed.2018.dat$Y17 <- home.2017.krig$Yield
seed.2018.dat$Y18 <- home.krig$Yield
if(!file.exists("home.Rda")) {
home.krig <- krig.yield.fn(harvest.2018.dat[,c("Yield","Longitude","Latitude")],
seed.2018.dat[,c("Longitude","Latitude")],rng=60)
home.2016.krig <- krig.yield.fn(harvest.2016.dat[,c("Yield","Longitude","Latitude")],
seed.2018.dat[,c("Longitude","Latitude")],rng=60)
save(home.krig,home.2016.krig,file="home.Rda")
}
load(file="home.Rda")
# plot(gamma ~ dist, data=home.krig$harvest.var,
# ylim=c(0,max(home.krig$harvest.var$gamma)),col="blue")
# abline(v=60)
GridLong <- reshape(common.dat,
varying = c('Y18','Y17','Y16','Y15','Y13','Y19'),
v.names = "Yield",
timevar = "Year",
times = c(2018,2017,2016,2015,2013,2019),
direction = "long")
plot(Yield~Longitude,data=GridLong)
GridLong$Year <- factor(GridLong$Year)
ggplot(GridLong, aes(Longitude,Yield,color=Year)) + scale_colour_manual(values=cbPalette) +
geom_point(size=2) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Plot achieved yield against actual applied rates versus prescribed rates.
Model2.lm <- lm(Y18 ~ Y17 + R18,data=mapped.dat)
summary(Model2.lm)
##
## Call:
## lm(formula = Y18 ~ Y17 + R18, data = mapped.dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -232.391 -5.139 1.802 8.420 67.464
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.673e+02 3.608e+01 -7.409 2.33e-12 ***
## Y17 -2.331e+00 6.549e-01 -3.560 0.00045 ***
## R18 2.362e-02 1.642e-03 14.385 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.51 on 233 degrees of freedom
## Multiple R-squared: 0.5214, Adjusted R-squared: 0.5173
## F-statistic: 126.9 on 2 and 233 DF, p-value: < 2.2e-16
Model2.alt.lm <- lm(Y18 ~ R18 + Y17,data=mapped.dat)
summary(Model2.alt.lm)
##
## Call:
## lm(formula = Y18 ~ R18 + Y17, data = mapped.dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -232.391 -5.139 1.802 8.420 67.464
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.673e+02 3.608e+01 -7.409 2.33e-12 ***
## R18 2.362e-02 1.642e-03 14.385 < 2e-16 ***
## Y17 -2.331e+00 6.549e-01 -3.560 0.00045 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.51 on 233 degrees of freedom
## Multiple R-squared: 0.5214, Adjusted R-squared: 0.5173
## F-statistic: 126.9 on 2 and 233 DF, p-value: < 2.2e-16
Model2.2.lm <- lm(Y18 ~ Y17,data=mapped.dat)
summary(Model2.2.lm)
##
## Call:
## lm(formula = Y18 ~ Y17, data = mapped.dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -340.51 -3.83 4.92 11.94 51.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.032 40.616 0.715 0.475
## Y17 3.516 0.704 4.995 1.15e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.13 on 234 degrees of freedom
## Multiple R-squared: 0.09634, Adjusted R-squared: 0.09248
## F-statistic: 24.95 on 1 and 234 DF, p-value: 1.153e-06
anova(Model2.2.lm)
## Analysis of Variance Table
##
## Response: Y18
## Df Sum Sq Mean Sq F value Pr(>F)
## Y17 1 19739 19738.5 24.947 1.153e-06 ***
## Residuals 234 185148 791.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model3.lm <- lm(Y18 ~ R18 + Y17+R18*Y17,data=mapped.dat)
Model3.alt.lm <- lm(Y18 ~ Y17 + R18 + R18*Y17,data=mapped.dat)
summary(Model3.lm)
##
## Call:
## lm(formula = Y18 ~ R18 + Y17 + R18 * Y17, data = mapped.dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -205.043 -6.667 0.620 7.562 48.621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.224e+03 6.745e+02 -6.262 1.83e-09 ***
## R18 1.743e-01 2.571e-02 6.781 9.80e-11 ***
## Y17 6.565e+01 1.159e+01 5.663 4.37e-08 ***
## R18:Y17 -2.587e-03 4.405e-04 -5.873 1.48e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.18 on 232 degrees of freedom
## Multiple R-squared: 0.5833, Adjusted R-squared: 0.5779
## F-statistic: 108.3 on 3 and 232 DF, p-value: < 2.2e-16
summary(Model3.alt.lm)
##
## Call:
## lm(formula = Y18 ~ Y17 + R18 + R18 * Y17, data = mapped.dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -205.043 -6.667 0.620 7.562 48.621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.224e+03 6.745e+02 -6.262 1.83e-09 ***
## Y17 6.565e+01 1.159e+01 5.663 4.37e-08 ***
## R18 1.743e-01 2.571e-02 6.781 9.80e-11 ***
## Y17:R18 -2.587e-03 4.405e-04 -5.873 1.48e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.18 on 232 degrees of freedom
## Multiple R-squared: 0.5833, Adjusted R-squared: 0.5779
## F-statistic: 108.3 on 3 and 232 DF, p-value: < 2.2e-16
anova(Model2.lm)
## Analysis of Variance Table
##
## Response: Y18
## Df Sum Sq Mean Sq F value Pr(>F)
## Y17 1 19739 19739 46.90 6.577e-11 ***
## R18 1 87087 87087 206.93 < 2.2e-16 ***
## Residuals 233 98061 421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model2.alt.lm)
## Analysis of Variance Table
##
## Response: Y18
## Df Sum Sq Mean Sq F value Pr(>F)
## R18 1 101492 101492 241.153 < 2.2e-16 ***
## Y17 1 5334 5334 12.673 0.0004495 ***
## Residuals 233 98061 421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model3.lm)
## Analysis of Variance Table
##
## Response: Y18
## Df Sum Sq Mean Sq F value Pr(>F)
## R18 1 101492 101492 275.813 < 2.2e-16 ***
## Y17 1 5334 5334 14.495 0.0001799 ***
## R18:Y17 1 12691 12691 34.489 1.475e-08 ***
## Residuals 232 85370 368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model3.alt.lm)
## Analysis of Variance Table
##
## Response: Y18
## Df Sum Sq Mean Sq F value Pr(>F)
## Y17 1 19739 19739 53.641 3.941e-12 ***
## R18 1 87087 87087 236.667 < 2.2e-16 ***
## Y17:R18 1 12691 12691 34.489 1.475e-08 ***
## Residuals 232 85370 368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(mapped.dat, aes(R18, Y18)) +
geom_boxplot(aes(group = cut_width(R18, 500)), outlier.alpha = 0.1) +
geom_jitter(width = 50,alpha=0.1)
ggplot(mapped.dat, aes(R18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mapped.dat, aes(R18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mapped.dat, aes(Y17,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mapped.dat, aes(Y17,R18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mapped.dat, aes(AR18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1) + geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mapped.dat, aes(R18,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
geom_smooth()
ggplot(mapped.dat, aes(AR18,Y18)) +
geom_point(aes(color=Variety), size=1) + geom_smooth(aes(color=Variety)) + scale_color_manual(values=cbPalette)
ggplot(mapped.dat, aes(Y16,R18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mapped.dat, aes(Y16,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mapped.dat, aes(Y17,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
TODO : optimize grid size
red.gam <- gam(Y18 ~ Y17, data=common.dat)
lm.gam <- gam(Y18 ~ R18 + Y17, data=common.dat)
gam.check(red.gam)
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
gam.check(lm.gam)
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 3 / 3
red.s.gam <- gam(Y18 ~ s(Y17), data=common.dat)
s.gam <- gam(Y18 ~ s(R18) + s(Y17), data=common.dat)
gam.check(red.s.gam)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 0.0003138598 .
## The Hessian was positive definite.
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Y17) 9.00 1.05 1.02 0.53
gam.check(s.gam)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 8 iterations.
## The RMS GCV score gradient at convergence was 2.673539e-05 .
## The Hessian was positive definite.
## Model rank = 19 / 19
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(R18) 9.00 8.80 1.08 0.92
## s(Y17) 9.00 1.81 0.99 0.42
k9.gam <- gam(Y18 ~ s(R18,k=8) + s(Y17,k=8), data=common.dat)
summary(lm.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ R18 + Y17
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.673e+02 3.608e+01 -7.409 2.33e-12 ***
## R18 2.362e-02 1.642e-03 14.385 < 2e-16 ***
## Y17 -2.331e+00 6.549e-01 -3.560 0.00045 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.517 Deviance explained = 52.1%
## GCV = 426.28 Scale est. = 420.86 n = 236
summary(s.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18) + s(Y17)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 231.6885 0.7774 298 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 8.805 8.987 117.989 <2e-16 ***
## s(Y17) 1.814 2.312 0.642 0.498
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.836 Deviance explained = 84.4%
## GCV = 150.01 Scale est. = 142.62 n = 236
summary(k9.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18, k = 8) + s(Y17, k = 8)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 231.6885 0.8167 283.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 6.973 6.999 135.158 <2e-16 ***
## s(Y17) 1.881 2.398 0.775 0.405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.819 Deviance explained = 82.6%
## GCV = 164.26 Scale est. = 157.4 n = 236
anova(lm.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ R18 + Y17
##
## Parametric Terms:
## df F p-value
## R18 1 206.93 < 2e-16
## Y17 1 12.67 0.00045
anova(s.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18) + s(Y17)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 8.805 8.987 117.989 <2e-16
## s(Y17) 1.814 2.312 0.642 0.498
anova(k9.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18, k = 8) + s(Y17, k = 8)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 6.973 6.999 135.158 <2e-16
## s(Y17) 1.881 2.398 0.775 0.405
red.gam <- gam(Y18 ~ Y17 + Y16 + Y15 + Y13, data=common.dat)
lm.gam <- gam(Y18 ~ R18 + Y17 + Y16 + Y15 + Y13, data=common.dat)
gam.check(red.gam)
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 5 / 5
gam.check(lm.gam)
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 6 / 6
red.s.gam <- gam(Y18 ~ s(Y17) + s(Y16) + s(Y15) + s(Y13), data=common.dat)
s.gam <- gam(Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13), data=common.dat)
gam.check(red.s.gam)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 15 iterations.
## The RMS GCV score gradient at convergence was 0.0003430548 .
## The Hessian was positive definite.
## Model rank = 37 / 37
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Y17) 9.00 2.43 1.03 0.67
## s(Y16) 9.00 5.08 0.93 0.15
## s(Y15) 9.00 8.76 1.09 0.89
## s(Y13) 9.00 7.75 0.96 0.28
gam.check(s.gam)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 23 iterations.
## The RMS GCV score gradient at convergence was 5.278616e-06 .
## The Hessian was positive definite.
## Model rank = 46 / 46
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(R18) 9.00 5.66 1.07 0.84
## s(Y17) 9.00 1.00 1.06 0.86
## s(Y16) 9.00 1.00 0.93 0.14
## s(Y15) 9.00 1.42 1.09 0.92
## s(Y13) 9.00 4.87 1.05 0.78
k9.gam <- gam(Y18 ~ s(R18,k=9) + s(Y17,k=9) + s(Y16,k=9) + s(Y15,k=9) + s(Y13,k=9), data=common.dat)
summary(lm.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ R18 + Y17 + Y16 + Y15 + Y13
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -168.78696 33.74676 -5.002 1.13e-06 ***
## R18 0.01314 0.00179 7.344 3.57e-12 ***
## Y17 -0.18385 0.61286 -0.300 0.764
## Y16 -0.11561 0.09450 -1.223 0.222
## Y15 -0.58222 0.11413 -5.101 7.06e-07 ***
## Y13 2.36831 0.24155 9.805 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.663 Deviance explained = 67%
## GCV = 301.4 Scale est. = 293.74 n = 236
summary(s.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 231.6885 0.6874 337.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 5.657 6.633 29.409 <2e-16 ***
## s(Y17) 1.000 1.000 0.268 0.605
## s(Y16) 1.000 1.000 1.864 0.174
## s(Y15) 1.424 1.733 24.170 <2e-16 ***
## s(Y13) 4.874 5.737 31.340 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.872 Deviance explained = 88%
## GCV = 119.05 Scale est. = 111.51 n = 236
summary(k9.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18, k = 9) + s(Y17, k = 9) + s(Y16, k = 9) + s(Y15,
## k = 9) + s(Y13, k = 9)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 231.6885 0.6871 337.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 5.703 6.534 30.054 <2e-16 ***
## s(Y17) 1.000 1.000 0.269 0.605
## s(Y16) 1.000 1.000 1.870 0.173
## s(Y15) 1.404 1.702 24.639 <2e-16 ***
## s(Y13) 4.754 5.541 29.447 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.872 Deviance explained = 88%
## GCV = 118.89 Scale est. = 111.4 n = 236
anova(lm.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ R18 + Y17 + Y16 + Y15 + Y13
##
## Parametric Terms:
## df F p-value
## R18 1 53.931 3.57e-12
## Y17 1 0.090 0.764
## Y16 1 1.497 0.222
## Y15 1 26.023 7.06e-07
## Y13 1 96.131 < 2e-16
anova(s.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 5.657 6.633 29.409 <2e-16
## s(Y17) 1.000 1.000 0.268 0.605
## s(Y16) 1.000 1.000 1.864 0.174
## s(Y15) 1.424 1.733 24.170 <2e-16
## s(Y13) 4.874 5.737 31.340 <2e-16
anova(k9.gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y18 ~ s(R18, k = 9) + s(Y17, k = 9) + s(Y16, k = 9) + s(Y15,
## k = 9) + s(Y13, k = 9)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(R18) 5.703 6.534 30.054 <2e-16
## s(Y17) 1.000 1.000 0.269 0.605
## s(Y16) 1.000 1.000 1.870 0.173
## s(Y15) 1.404 1.702 24.639 <2e-16
## s(Y13) 4.754 5.541 29.447 <2e-16
plot(lm.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)
plot(s.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)
plot(k9.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)
seed.2018.dat <- grid.field(seed.2018.dat)
cells.seed.dat$AR18 <- aggregate(AppliedRate ~ Row + Column,data=mapped.dat,FUN=mean,na.rm=TRUE)$AppliedRate
Model2_dag <- dagify(Y18 ~ R18,
R18 ~ Y17,
Y18 ~ Y17,
labels = c("Y18" = "Yield\n 2018",
"R18" = "Seeding Rate",
"Y17" = "Yield \n2017"),
#latent = "unhealthy",
#exposure = "smoking",
outcome = "Y18")
ggdag(Model2_dag, text = FALSE, use_labels = "label")
Model2_dag <- dagify(Y18 ~ R18,
Y18 ~ Y17,
labels = c("Y18" = "Yield\n 2018",
"R18" = "Seeding Rate",
"Y17" = "Yield \n2017"),
#latent = "unhealthy",
#exposure = "smoking",
outcome = "Y18")
ggdag(Model2_dag, text = FALSE, use_labels = "label")